cap log close
cap log using ///
	"D:/Files/Shasta County/Aria_Scratch/build_trumpia.text", t replace 
/*****************************************************************************/	
dis as error "{hline 59}" _newline ///
	as text "Ran by: " 	_column(16) c(username) 		_newline ///
	as text "Date: " 	_column(16) c(current_date) 	_newline ///
	as text "Time: " 	_column(16) c(current_time) 	_newline ///
	as error "{hline 59}"
	
/****************************************************************************
// Title: 	build_trumpia.do 
// Where? 	"D:/Files/Shasta County/Aria_Scratch"
// by: 		Aria		
// Purpose: This do file reads input and output Trumpia csv and excel files.
				
// Edits/To-Do/Comments:
	- [MM/DD/YY] - [name] ...
------------------------------------------------------------------------------
TOC
------------------------------------------------------------------------------
// Part 0: 		Setting up the environment
// Part 1: 		Reading and cleaning *Output for Trumpia*
// Part 2:		RCT Round & Date pairs
// Part 3:  	Reading and cleaning *Input from Trumpia* Distribution files
// Part 4:  	Reading and cleaning *Input from Trumpia* Add_ files
// Part 5:  	Reading and cleaning *Input from Trumpia* Failed files
// Part 6:  	Merge
*****************************************************************************/
set more off, perm
global dir "D:/Files/Shasta County"
global data "D:/Files/Shasta County/CarlyWill_Scratch/Data"
global temp "D:/Files/Shasta County/CarlyWill_Scratch/_temp"

/****************************************************************************
	Part 1: Reading and cleaning *Output for Trumpia*
*****************************************************************************/
cd "${dir}/Output for Trumpia/output_for_trumpia_clean"

/* List file names. */
local filelist: dir . files "*.csv"
di `filelist'

	local m = 1

foreach f of local filelist {

	qui import delimited using "`f'", stringcols(_all) varname(1) clear
		qui gen court_dateD = subinstr( "`f'",".csv","",.)
		qui replace court_dateD = subinstr(court_dateD, "trumpia","",.)
		qui destring court_dateD, replace

		tempfile save`m'
		qui save "`save`m''"	
	local m `++m'
}

local obs = `m'-1
	dis `obs'

qui use "`save1'", clear
forv i=2/`obs' {
	qui append using "`save`i''"
}

/* missing phone number? */
gen missing_phone = phone == "0"
la var missing_phone "Missing Phone Number" 

la var court_dateD 	" Court Date"
la var court_time 	"Court Time"
la var court_dept 	"Court Dept"
la var phone 		"Phone Number"
la var dow 			"Court Day of Week"
la var month 		"Court Month"
la var day 			"Court Day of Month"
la var year 		"Court Year"
la var trumpia_id 	"Trumpia ID (constructed)"

foreach v of varlist _all {
	capture confirm string variable `v'
	if !_rc {
		qui replace `v' = strlower(`v')
		qui replace `v' = ltrim(`v')
		qui replace `v' = rtrim(`v')
	}
}
egen rct_round = group(court_dateD)
la var rct_round "RCT Round"

order court_dateD rct_round trumpia_id 

save "${data}/trumpia_input.dta", replace 

/*************************************************************************
	Part 2:	RCT Round & Date pairs
*************************************************************************/
use "${data}/trumpia_input.dta", clear
	keep court_dateD rct_round
	duplicates drop 
save "${data}/round_date.dta", replace 

/*************************************************************************
	Part 3:	Trumpia Distribution
*************************************************************************/
cd "${dir}/Input from Trumpia/input_from_trumpia_clean"

local filelist: dir . files "Distribution*"
di `filelist'

	local m = 1

foreach f of local filelist {

	qui import delimited using "`f'", stringcols(_all) varname(1) clear
	
		/* List is RCT round that need cleaning */
		keep list mobile
		
		/* phone numnber */
		rename mobile phone
		qui replace phone = subinstr(phone, "+1 ", "", .)
		
		/* Labeling */
		la var phone "Phone Numnber" 
		
		qui replace list = "1st RCT Text" if strpos(list, "First")>0
		gen fst_space = strpos(list, " ")
		gen rct_round = substr(list, 1, fst_space)
		drop fst_space
		drop list
		
		local xs " "th" "nd" "rd" "st" "
		foreach x of local xs {
			qui replace rct_round = subinstr(rct_round, "`x'", "",.)
		}
		qui destring rct_round, replace
		replace rct_round = (rct_round-1) if rct_round > 16
		
		order rct_round 
		qui compress
		
		merge m:1 rct_round using "${dir}/CarlyWill_Scratch/Data/round_date.dta", ///
			keep(match master) nogen
			
		levelsof rct_round, local(j)
		qui save "${dir}/CarlyWill_Scratch/_temp/dist_`j'", replace
	
		tempfile save`m'
		qui save "`save`m''"
	local m `++m'
}

local obs = `m'-1
	dis `obs'

qui use "`save1'", clear
forv i=2/`obs' {
		qui append using "`save`i''"
}
	
save "${data}/trumpia_dist.dta", replace

/*************************************************************************
	Part 4:	Trumpia Add_
*************************************************************************/
cd "${dir}/Input from Trumpia/input_from_trumpia_clean"

local filelist: dir . files "Add_*"
di `filelist'

	local m = 1

foreach f of local filelist {

	qui import delimited using "`f'", stringcols(_all) varname(1) clear
		qui gen _source = subinstr( "`f'",".csv","",.)
		gen court_dateD = regexs(0) if(regexm(_source, "[0-9][0-9][0-9][0-9][0-9]$"))
		qui destring court_dateD, replace
		drop _source
		
		tempfile save`m'
		qui save "`save`m''"
		
	local m `++m'
}

local obs = `m'-1	
	dis `obs'

qui use "`save1'", clear
forv i=2/`obs' {
	qui append using "`save`i''"
}


rename v10 reason_phone
foreach v of varlist v9 v11 {
	qui replace reason_phone = reason_phone + `v' if strpos(`v', "[phone]")>0
	drop `v' 
}
	

replace reason_phone = invalidmessage if missing(reason_phone)

drop if strpos(reason_phone, "duplicated")>0
drop if strpos(reason_phone, "dow")>0

gen 	reason_catP = "" 
replace reason_catP = "Blocked (Contact)" 		if strpos(reason_phone, "blocked")>0
replace reason_catP = "Invalid Length (Contact)" if strpos(reason_phone, "length")>0
la var  reason_catP	"Fail Reason Category"

gen contact_related_nonzero = phone != "0"
la var contact_related_nonzero "Non-Missing Contact Fail"

la var court_dateD 	"Court Date"
la var phone 		"Phone Number"
la var trumpia_id 	"Trumpia ID (constructed)"

keep court_dateD trumpia_id phone reason_catP contact_related_nonzero 

qui compress

merge m:1 court_dateD using "${data}/round_date.dta", ///
	keep(match master) nogen
	
save "${data}/trumpia_add.dta", replace
/*------------------------------------------------*/
use "${data}/trumpia_add.dta", clear	
	
	levelsof rct_round, local(j)
	foreach v in `j' {
	use "${data}/trumpia_add.dta" if rct_round ==  `v', replace
	qui save "${temp}/add_`v'", replace
	}

/*************************************************************************
	Part 5: Trumpia Failed
*************************************************************************/
cd "${dir}/Input from Trumpia/input_from_trumpia_clean"

local filelist: dir . files "Failed*"
di `filelist'

	local m = 1

foreach f of local filelist {

	qui import excel "`f'", sheet("Sheet2") firstrow allstring clear
	
		qui gen _source = subinstr( "`f'",".xlsx","",.)
		qui replace _source = subinstr(_source, "failedtexts", "",.)
		gen day = regexs(0) if regexm(_source, "[0-9]*$")
		qui destring day, replace
		qui gen mdate = regexs(0) if regexm(_source, "^[a-z]+")
		gen month = month(date(mdate, "M"))
		drop mdate
		gen year = 2021
		gen court_dateD = mdy(month, day, year)
		qui replace court_dateD = court_dateD + 3
		qui replace court_dateD = (court_dateD-1) if _source == "jun9"
		qui replace court_dateD = (court_dateD-2) if _source == "may24"
	
		qui drop if regexm(Number, "[0-9]") == 0
		qui drop if Number == "0"
		foreach var of varlist _all {
			capture assert missing(`var')
			if !_rc {
				qui drop `var'
			}
		}
		drop Name
		
		rename Number phone 
		
		tempfile save`m'
		qui save "`save`m''"
	local m `++m'
}

local obs = `m'-1
	dis `obs'

qui use "`save1'", clear
forv i=2/`obs' {
	qui append using "`save`i''"
}

rename FailReason reason_carrier 
la var reason_carrier "Failed Reason Carrier-Related"

gen 	reason_catC ="" 
replace reason_catC = "Will Retry (Carrier)" 		if strpos(reason_carrier, "retried")>0
replace reason_catC = "Illegal or no SMS (Carrier)" 	if strpos(reason_carrier, "For example")>0
replace reason_catC = "Anti-Spam Program (Carrier)" 	if strpos(reason_carrier, "anti-spam")>0
replace reason_catC = "No Text Support (Carrier)" 	if strpos(reason_carrier, "does not support text")>0

la var reason_catC	"Fail Reason Category"
	
keep phone reason_catC court_dateD
qui compress 

merge m:1 court_dateD using "${data}/round_date.dta", ///
	keep(match master) nogen
	
	
save "${data}/trumpia_fail.dta", replace
/*------------------------------------------------*/
use "${data}/trumpia_fail.dta", clear	
	
	levelsof rct_round, local(j)
	foreach v in `j' {
	use "${data}/trumpia_fail.dta" if rct_round ==  `v', replace
	qui save "${temp}/fail_`v'", replace
	}


		tempfile save`m'
		qui save "`save`m''"
/*
levelsof FailReason
	
	`"Message will be retried by carrier"' 
	`"SMS could not be delivered due to a phone error. 
		For example, the phone does not support SMS or 
		is illegally registered on the network."' 
	`"The carrier rejected the message. 
		This can be due to short code messaging blocked, anti-spam policies, or
			their wireless plan."'
	`"The mobile number does not support text messages."'
*/

/*************************************************************************
	Part 6: Merge
*************************************************************************/
cd "${data}"

local mvar_11 court_dateD rct_round phone trumpia_id
local mvar_m1  court_dateD rct_round phone

use "trumpia_input.dta", clear

	merge 1:1 `mvar_11' using "trumpia_add.dta"
	gen contact_related = _merge == 3
	la var contact_related "Contact-Related Fail"
	drop _merge
	
	merge m:1 `mvar_m1' using "trumpia_fail.dta"
	gen carrier_related = _merge == 3
	la var carrier_related "Carrier-Related Fail"
	drop _merge	
	
	merge m:1 `mvar_m1' using "trumpia_dist.dta"
	gen distribution = _merge == 3
	la var distribution "Record in Distribution Files"
	drop _merge	
	
gen 	fail_source = ""
replace fail_source = "Contact" if contact_related == 1
replace fail_source = "Carrier" if carrier_related == 1
la var 	fail_source "Fail Source"

egen reason_cat = concat(reason_catP reason_catC)
la var reason_cat	"Fail Reason Category"
 
gen     stop = 0
*XX
*replace stop = 1 if (distribution == 0 & contact_related == 0 & carrier_related==0) 
replace stop = 1 if (distribution == 0 & contact_related == 0 )   
la var 	stop "Replied Stop"

*replace distribution = 0 if carrier_related == 1

egen sent = rowtotal(distribution  stop)
la var sent "Text is Sent"

*XX CW CHANGE XX*
*	"court_dateD rct_round trumpia_id court_time court_dept phone dow month day year sent missing_phone reason_cat fail_source "

local  v  ///
"court_dateD rct_round trumpia_id court_time court_dept phone dow month day year sent missing_phone reason_cat fail_source missing_phone contact_related contact_related_nonzero carrier_related distribution"
order `v' 
keep `v'

save "${data}/trumpia_result.dta", replace

/**************************************************************/
/*  Raading Outcome Files	*/
/**************************************************************/

cd "${data}"
local fs "Cases Charges CourtDates Offenders"
foreach f of local fs {
	import excel Outcome`f',  firstrow  case(lower) allstring clear
	rename personkey person_key
	drop if missing(person_key)

	save "Outcome`f'.dta", replace
}

/**************************************************************/
/*  Merging in the criminal history from `OutcomeOffenders' into */
/*  `OutcomeCourtDates' which is our main outcome-related dataset */
/**************************************************************/
/* 0 */
use "${data}/GNRM_visit.dta", clear
append using "${data}/GNRM_night.dta"
	drop _merge
	order  fullname person_key court_dateD
	sort  fullname person_key court_dateD
by fullname person_key court_dateD: keep if _n==1
count /* 31 */

drop if court_dateD < 22379 

keep person_key court_dateD days_before homeless_flag assignment
gen gnrm = 1

save "${data}/rct_GNRM.dta", replace

/* 1: crim history */
use "${data}/OutcomeCourtDates.dta", clear

gen court_dateD = date(courtdate, "MDY")
format court_dateD %d

drop if court_dateD > 22379 - 1

destring caseyear, replace
destring casenumb, replace

sort person_key caseyear casenumb fullcase

gen prev_fta0 = fta == "Y"
by person_key fullcase , sort: egen prev_fta_caselevel = max(prev_fta0)
drop prev_fta0

sort person_key caseyear casenumb fullcase
duplicates drop  person_key caseyear casenumb fullcase, force

by person_key: gen new_case = fullcase != fullcase[_n-1]
by person_key: gen court_num = sum(new_case)
by person_key: egen max_prev_case = max(court_num)


by person_key: gen prev_fta_num = sum(prev_fta_caselevel)
by person_key: egen max_prev_fta = max(prev_fta_num)

keep person_key  max_prev_case max_prev_fta

gen prev_case_cons = max_prev_case > 0 
gen prev_fta_cons = max_prev_fta > 0 

duplicates drop 

la var max_prev_case  "# previous cases (since 2016)"
la var max_prev_fta "# previous FTAs (since 2016)"
la var  prev_case_cons "=1 if had a case before (since 2016)"
la var prev_fta_cons "=1 if FTA'd before"
save "${data}/CrimHistory.dta", replace


/****************** (2) ************/



use "${data}/OutcomeCourtDates.dta", clear

gen court_dateD = date(courtdate, "MDY")
format court_dateD %d
 
rename middlename middlname

merge m:1 person_key using "${data}/OutcomeOffenders.dta", nogen

drop ac ad ae af ag ah ai

gen dobD=date(birthdate, "MDY")
format dobD %d

local names last first middl
foreach n of local names { 
qui replace `n'name = ltrim(`n'name)
qui replace `n'name = rtrim(`n'name)
}
gen name = lastname + ", " + firstname + " " + middlname
drop lastname firstname middlname

qui compress

gen dow = dow(court_dateD)
la var dow "Day of Week"

gen white = race == "W"
la var white "White Defendant"
gen race_miss = race == ""
la var race_miss "Race Info Missing" 

gen male = sex == "M"
la var male "Male"
gen gender_miss = sex == ""
la var gender_miss "Gender Info Missing"

gen californian = state == "CA"
la var californian "CA Resident" 

gen state_miss = state == ""
la var state_miss "State of Residence Missing"

gen shasta_code = areacode == "530"
la var shasta_code "=1 Shasta County Area Code" 

gen age = round((court_dateD - dobD) / 365.25)
la var age "Age"

local vars ///
	cases hlcases pend ftacase ftacourt priorcase
foreach v of local vars {
destring `v' , replace
}

gen prev_case = priorcase > 0
la var prev_case "=1 if had a case previously"

gen prev_fta = ftacase > 0
la var prev_fta "=1 if had a FTA"

save "${data}/OutcomeCourtDatesAndHistory_v0.dta", replace

/**************************************************************/
/* (1) merge TRUMPIA results with COURT  */
/**************************************************************/

use "${data}/trumpia_result.dta", clear

local mergevar "court_dateD court_dept  phone"

*XX CW
preserve
keep if missing_phone==1
tempfile missing 
destring trumpia_id, force replace

merge 1:1 trumpia_id court_dateD court_dept using "D:\Files\Shasta County\CarlyWill_Scratch\Data\missing_help_cw" 
keep if _merge==3
drop _merge

merge 1:m person_key court_dateD  using "${data}/OutcomeCourtDatesAndHistory_v0.dta", force

keep if _merge == 3 
drop _merge

gen rctfta = fta == "Y"
by person_key court_dateD , sort: egen fta_any = max(rctfta)
by person_key court_dateD , sort: keep if _n == 1
save `missing'
restore
*XX

*use `missing', clear

count /* 1,164 */ 
drop if missing_phone== 1 /* (201 observations deleted) */
keep court_dateD court_time court_dept phone sent reason_cat fail_source sent missing_phone  missing_phone contact_related contact_related_nonzero carrier_related distribution rct_round
format court_dateD %d

duplicates drop `mergevar' , force /* (1 observation deleted) */
count /* 962 */ 

merge 1:m `mergevar' using "${data}/courts_cw.dta", nogen


/*
    Result                           # of obs.
    -----------------------------------------
    not matched                         1,376
        from master                         0  (_merge==1)
        from using                      1,376  (_merge==2)

    matched                               963  (_merge==3)
    -----------------------------------------
*/

replace reason_cat = "Missing Phone #" if phone == "0"
replace fail_source = "Missing Phone #" if phone == "0"

/* cleaning case id number */
split case_id, p(-)
/* case_id1: CRM */
/* case_id2: year */
/* case_id3: case number with leading zeores */ 
/* case_id4: I think useless. It's like 0002 */ 
replace case_id1 = "CRM"
destring case_id3, replace
tostring case_id3, gen(case_num)
order case_num
gen fullcase = case_id1 + "-" + case_id2 + "-" + case_num
drop case_id*

count

save "${data}/ExperimentalData.dta", replace

/*********************************************/
/* */
/*********************************************/
use "${data}/OutcomeCourtDatesAndHistory_v0.dta", clear

merge m:1 person_key court_dateD  using "${data}/ExperimentalData.dta", force
/*
    Result                           # of obs.
    -----------------------------------------
    not matched                       157,660
        from master                   157,601  (_merge==1)
        from using                         59  (_merge==2)

    matched                             2,280  (_merge==3)
    -----------------------------------------
*/

*** keep if casetype == "CRM"
preserve
keep if _merge == 2
drop _merge
foreach var of varlist _all {
	capture assert missing(`var')
	if !_rc {
		drop `var'
	}
}
*merge 1:m court_dateD fullcase dobD name using ///`
*	"${dir}/Aria_Scratch/Data/OutcomeCourtDatesAndHistory.dta"
save "${data}/NotMerged.dta", replace
restore 

keep if _merge == 3
drop _merge

gen rctfta = fta == "Y"
by person_key court_dateD , sort: egen fta_any = max(rctfta)
by person_key court_dateD , sort: keep if _n == 1

gen assign_treatment = assignment == "TREATMENT"
la var assign_treatment "Assigned Treatment"

gen homeless = homeless_flag == "T"
la var homeless "Homeless"

gen treated = sent
replace treated = 0 if  treated == .
la var treated "Treated"

cap drop did
gen did = assign_treatment*homeless 
la var did "Treated x Control"


merge m:1 person_key using "${data}/CrimHistory.dta", keep(match master)

foreach v of varlist max_prev_case max_prev_fta prev_case_cons prev_fta_cons {
	replace `v' = 0 if _merge == 1 
	}
drop _merge
	
	
merge m:1 person_key court_dateD using "${data}/rct_GNRM.dta"
 
gen fta_gnrm = fta_any
replace fta_gnrm = 0 if gnrm == 1 & assign_treatment == 1/* (19 real changes made) */

save "${data}/_RCT_Sample.dta", replace


/*****************************************************************************/
dis as error "{hline 59}" _newline ///
	as text "Ran by: " 	_column(16) c(username) 		_newline ///
	as text "Date: " 	_column(16) c(current_date) 	_newline ///
	as text "Time: " 	_column(16) c(current_time) 	_newline ///
	as error "{hline 59}"
/*****************************************************************************/
cap log close 
/*************************************************************************

*************************************************************************/	
use "${data}/ExperimentalData.dta", clear

*to get ftas

merge 1:m person_key court_dateD  using "${data}/OutcomeCourtDatesAndHistory_v0.dta", force

keep if _merge == 3 | _merge==1
drop _merge

gen rctfta = fta == "Y"
by person_key court_dateD , sort: egen fta_any = max(rctfta)
by person_key court_dateD , sort: keep if _n == 1


*add back in missing phone
append using `missing', force
gen records = 1

*XX
drop if rct_round==.



gen flag = 1 if inlist(rct_round, 2,17,23,24)
la var flag "Incomplete Trumpia Export"

/*
drop dist_net
g dist_net=distribution if flag==.
	replace dist_net =0 if  carrier_related==1 & flag==.

*for summary stats on those delivered vs. asing stop 
sum fta_any if dist_net==1
sum fta_any if distribution==0 & contact_related==0 & flag==.

bysort homeless_flag: sum fta_any if dist_net==1
bysort homeless_flag: sum fta_any if distribution==0 & contact_related==0 & flag==.
*/
collapse ///
	(sum) records missing_phone contact_related contact_related_nonzero ///
		carrier_related distribution fta_any ///
		(min) court_dateD (max) flag, by(rct_round homeless_flag)

la var records 		"Court Records"
la var missing_phone "Court Records Missing Phone #"

gen records_valid = (records - missing_phone)
la var records_valid "Court Records Non-Missing Phone #"

la var contact_related "Contact Related Fail (_add)"
la var contact_related_nonzero  "Contact Related Fail Non-Missing Phone #(_add)"
la var carrier_related "Carrier-Related Fail (_fail)"
la var distribution "Record in Distribution Files"

gen distribution_net = distribution - carrier_related
replace distribution_net = 0 if distribution == 0 
la var distribution_net "Texts Successfully Sent (_dist - _fail)"

la var court_dateD "Court Date"

*XXCW	
gen stop = records-contact_related-distribution
*gen stop = (records_valid - distribution- contact_related_nonzero)

la var stop "Replied STOP"



/* for rounds 2,17,23,24; we know people got the text - regardless of  */
/* replying STOP or not; they have received it */
replace distribution_net = stop if flag == 1 

order rct_round court_dateD records missing_phone records_valid contact_related ///
	contact_related_nonzero distribution carrier_related distribution_net stop flag fta_any 
 
export excel using "What Happens to the Texts (Numbers-By wave) homeless_flag.xlsx",  firstrow(varlabels) replace

/***********************************************
All Waves except incomplete ones G
***********************************************/
drop if flag == 1

collapse (sum)  records missing_phone records_valid contact_related contact_related_nonzero ///
	 distribution distribution_net carrier_related stop  fta_any , by(homeless_flag) 
	
la var records 		"Court Records"
la var missing_phone "Court Records Missing Phone #"
la var records_valid "Court Records Non-Missing Phone #"
la var contact_related "Contact Related Fail (_add)"
la var contact_related_nonzero  "Contact Related Fail Non-Missing Phone #(_add)"
la var carrier_related "Carrier-Related Fail (_fail)"
la var distribution "Record in Distribution Files" 
la var distribution_net "Texts Successfully Sent (_dist - _fail)"
la var stop "Replied STOP"

order  records missing_phone records_valid contact_related ///
	contact_related_nonzero distribution carrier_related distribution_net stop fta_any 
bysort	homeless_flag: sum
export excel using "What Happens to the Texts (Numbers Total - Flag) homeless_flag.xlsx",  firstrow(varlabels) replace

/* same thing but for entire sample*/
collapse (sum)  records missing_phone records_valid contact_related contact_related_nonzero ///
	 distribution distribution_net carrier_related stop fta_any 
	export excel using "What Happens to the Texts (Numbers Total - Flag) full sample.xlsx",  firstrow(varlabels) replace 
/*****************************************************************************/
dis as error "{hline 59}" _newline ///
	as text "Ran by: " 	_column(16) c(username) 		_newline ///
	as text "Date: " 	_column(16) c(current_date) 	_newline ///
	as text "Time: " 	_column(16) c(current_time) 	_newline ///
	as error "{hline 59}"
/*****************************************************************************/
cap log close
